set seed 870418 // make sure the sampling (in years is the same for each re-run)

eststo clear

{ // Full notification sample (also create sample weights for EE-sample)
*===============================================================================
use "$datapath/A3_gen.dta",clear

keep if eventtime_def==0
keep if inrange(year,2005,2016)

gen tenure = tenureatnot/12

{ // Generate industry indicators
*===============================================================================
gen sni02 = substr(astsni02,1,2)
gen sni07 = substr(astsni07,1,2)

destring sni02 sni07, replace

*If both sni02 and sni07 exists, let sni07 have priority
replace sni02=. if sni07!=.
replace sni02=. if sni02==0
replace sni07=. if sni07==0

*Follows the letters of SNI07 and sometimes disaggeragtes to SNI02.
gen str	SNI_1dig = "" 
replace SNI_1dig = "A" 	if inrange(sni02,1,5) | inrange(sni07,1,3)
replace SNI_1dig = "B"  if inrange(sni02,10,14) | inrange(sni07,5,9)
replace SNI_1dig = "C"  if inrange(sni02,15,33) | inrange(sni07,10,33)
replace SNI_1dig = "DE" if inrange(sni02,40,21) | inrange(sni07,35,39)
replace SNI_1dig = "F"  if inrange(sni02,45,45) | inrange(sni07,41,43)
replace SNI_1dig = "G"  if inrange(sni02,50,52) | inrange(sni07,45,47)
replace SNI_1dig = "I"  if inrange(sni02,55,55) | inrange(sni07,55,56)
replace SNI_1dig = "HJ" if inrange(sni02,60,64) | inrange(sni07,49,53) | inrange(sni07,58,63)
replace SNI_1dig = "K"  if inrange(sni02,65,67) | inrange(sni07,64,66)
replace SNI_1dig = "LMN" if inrange(sni02,70,75) | inrange(sni07,68,82)
replace SNI_1dig = "O"  if inrange(sni02,74,74) | inrange(sni07,84,84)
replace SNI_1dig = "P"  if inrange(sni02,80,80) | inrange(sni07,85,85)
replace SNI_1dig = "Q"  if inrange(sni02,85,85) | inrange(sni07,86,88)
replace SNI_1dig = "RS" if inrange(sni02,90,93) | inrange(sni07,90,96)
replace SNI_1dig = "T"  if inrange(sni02,95,95) | inrange(sni07,97,98)
replace SNI_1dig = "U"  if inrange(sni02,99,99) | inrange(sni07,99,99)



gen agricu = SNI_1dig=="A"
gen mining = SNI_1dig=="B"
gen manufa = SNI_1dig=="C"
gen elecwat= SNI_1dig=="DE" 
gen constr = SNI_1dig=="F"
gen retail = SNI_1dig=="G"
gen transp = SNI_1dig=="HJ"
gen financ = SNI_1dig=="K"
gen nonfin = SNI_1dig=="LMN"
gen public = SNI_1dig=="O"
gen educa  = SNI_1dig=="P"
gen human  = SNI_1dig=="Q"
gen enter  = SNI_1dig=="RS"


gen other =  manufa==0  & constr==0 & retail==0 & transp==0 & financ==0 & nonfin==0 & public==0 & educa==0 & human==0 & enter==0
}
*

replace annual_ear_prenot_def = annual_ear_prenot_def/1000
replace manl_prenot_def = manl_prenot_def/1000
eststo: estpost sum female immigrant age_def tenure annual_ear_prenot_def manl_prenot_def educ1 educ2 educ3   	  	/// 
			  manufa  constr retail transp financ nonfin public educa human enter other firm_noemployed_RAMS
esttab , cells("mean(fmt(2))")

*THIS ONE HAS TO BE TYPED IN MANUALLY IN TABLE
bys firmid year : gen temp = _n
sum firm_noemployed_RAMS if  temp==1





*Generate year weights
preserve
	gen freq=1
	gcollapse (percent) freq ,by(year)
	save "$datapath/year_samplingweights.dta",replace
restore

*generate industry weights
preserve
	gen freq_industry =1
	gcollapse (percent) freq_industry, by(SNI_1dig)
	replace freq_industry = freq_industry/100
	save "$datapath/year_industrysamplingweights.dta",replace 
restore	
}
*
{ // Age 55-sample
*===============================================================================
use  "$datapath/A3_age55data_WC.dta",clear

keep if eventtime_def==0

gen tenure = tenureatnot/12


{ // Generate industry indicators
*===============================================================================
gen sni02 = substr(astsni02,1,2)
gen sni07 = substr(astsni07,1,2)

destring sni02 sni07, replace

*If both sni02 and sni07 exists, let sni07 have priority
replace sni02=. if sni07!=.
replace sni02=. if sni02==0
replace sni07=. if sni07==0

*Follows the letters of SNI07 and sometimes disaggeragtes to SNI02.
gen str	SNI_1dig = "" 
replace SNI_1dig = "A" 	if inrange(sni02,1,5) | inrange(sni07,1,3)
replace SNI_1dig = "B"  if inrange(sni02,10,14) | inrange(sni07,5,9)
replace SNI_1dig = "C"  if inrange(sni02,15,33) | inrange(sni07,10,33)
replace SNI_1dig = "DE" if inrange(sni02,40,21) | inrange(sni07,35,39)
replace SNI_1dig = "F"  if inrange(sni02,45,45) | inrange(sni07,41,43)
replace SNI_1dig = "G"  if inrange(sni02,50,52) | inrange(sni07,45,47)
replace SNI_1dig = "I"  if inrange(sni02,55,55) | inrange(sni07,55,56)
replace SNI_1dig = "HJ" if inrange(sni02,60,64) | inrange(sni07,49,53) | inrange(sni07,58,63)
replace SNI_1dig = "K"  if inrange(sni02,65,67) | inrange(sni07,64,66)
replace SNI_1dig = "LMN" if inrange(sni02,70,75) | inrange(sni07,68,82)
replace SNI_1dig = "O"  if inrange(sni02,74,74) | inrange(sni07,84,84)
replace SNI_1dig = "P"  if inrange(sni02,80,80) | inrange(sni07,85,85)
replace SNI_1dig = "Q"  if inrange(sni02,85,85) | inrange(sni07,86,88)
replace SNI_1dig = "RS" if inrange(sni02,90,93) | inrange(sni07,90,96)
replace SNI_1dig = "T"  if inrange(sni02,95,95) | inrange(sni07,97,98)
replace SNI_1dig = "U"  if inrange(sni02,99,99) | inrange(sni07,99,99)



gen agricu = SNI_1dig=="A"
gen mining = SNI_1dig=="B"
gen manufa = SNI_1dig=="C"
gen elecwat= SNI_1dig=="DE" 
gen constr = SNI_1dig=="F"
gen retail = SNI_1dig=="G"
gen transp = SNI_1dig=="HJ"
gen financ = SNI_1dig=="K"
gen nonfin = SNI_1dig=="LMN"
gen public = SNI_1dig=="O"
gen educa  = SNI_1dig=="P"
gen human  = SNI_1dig=="Q"
gen enter  = SNI_1dig=="RS"


gen other =  manufa==0  & constr==0 & retail==0 & transp==0 & financ==0 & nonfin==0 & public==0 & educa==0 & human==0 & enter==0
}
*
replace annual_ear_prenot_def = annual_ear_prenot_def/1000
replace manl_prenot_def = manl_prenot_def/1000
eststo: estpost sum female immigrant age_def tenure annual_ear_prenot_def manl_prenot_def educ1 educ2 educ3   	  	/// 
			  manufa  constr retail transp financ nonfin public educa human enter other firm_noemployed_RAMS
 
esttab , cells("mean(fmt(2))")

*THIS ONE HAS TO BE TYPED IN MANUALLY IN TABLE
bys firmid year : gen temp = _n
sum firm_noemployed_RAMS if  temp==1

}
*
{ // All employed workers (weighted by percentagne of observations each year in full notification sample)
*===============================================================================


{ // Appending years of Jobb data (2005 to 2016)
*===============================================================================

*Open 2000 Jobb data
use 	lopnr* year lonfink yrkstallnku astsni* 	using	"$rawdatapath/jobb_2005.dta", clear

*Appending year 2005-2016
forvalues i = 2006(1)2016{
	append using "$rawdatapath/jobb_`i'.dta", keep(lopnr* year lonfink yrkstallnku astsni*)
}
}
*
keep if yrkstallnku=="2" // keep only employed workers (i.e drop self-employed)
*sorting makes sure that the plant(!) with the highest earnings is the plant this is collapsed at the firm level 
sort lopnr year lonfink
fcollapse (last) astsni*  lopnr_peorgnr (sum) lonfink , by(lopnr year) fast

*Generate previous earnings
bys lopnr (year) : gen annual_ear_prenot_def = lonfink[_n-1] if year-1== year[_n-1]




merge m:1 year using "$datapath/year_samplingweights.dta", nogen

*Sample so that years are the same propotions as in the notifying sample
tab year
	forvalues t = 2005(1)2016 {
		qui sum freq if year==`t'
		sample `r(mean)' if year==`t'
	}
tab year

*rename variables
rename lopnr persid
rename lopnr_peorgnr  firmid


{ // Generate industry indicators
*===============================================================================
gen sni02 = substr(astsni02,1,2)
gen sni07 = substr(astsni07,1,2)

destring sni02 sni07, replace

*If both sni02 and sni07 exists, let sni07 have priority
replace sni02=. if sni07!=.
replace sni02=. if sni02==0
replace sni07=. if sni07==0

*Follows the letters of SNI07 and sometimes disaggeragtes to SNI02.
gen str	SNI_1dig = "" 
replace SNI_1dig = "A" 	if inrange(sni02,1,5) | inrange(sni07,1,3)
replace SNI_1dig = "B"  if inrange(sni02,10,14) | inrange(sni07,5,9)
replace SNI_1dig = "C"  if inrange(sni02,15,33) | inrange(sni07,10,33)
replace SNI_1dig = "DE" if inrange(sni02,40,21) | inrange(sni07,35,39)
replace SNI_1dig = "F"  if inrange(sni02,45,45) | inrange(sni07,41,43)
replace SNI_1dig = "G"  if inrange(sni02,50,52) | inrange(sni07,45,47)
replace SNI_1dig = "I"  if inrange(sni02,55,55) | inrange(sni07,55,56)
replace SNI_1dig = "HJ" if inrange(sni02,60,64) | inrange(sni07,49,53) | inrange(sni07,58,63)
replace SNI_1dig = "K"  if inrange(sni02,65,67) | inrange(sni07,64,66)
replace SNI_1dig = "LMN" if inrange(sni02,70,75) | inrange(sni07,68,82)
replace SNI_1dig = "O"  if inrange(sni02,74,74) | inrange(sni07,84,84)
replace SNI_1dig = "P"  if inrange(sni02,80,80) | inrange(sni07,85,85)
replace SNI_1dig = "Q"  if inrange(sni02,85,85) | inrange(sni07,86,88)
replace SNI_1dig = "RS" if inrange(sni02,90,93) | inrange(sni07,90,96)
replace SNI_1dig = "T"  if inrange(sni02,95,95) | inrange(sni07,97,98)
replace SNI_1dig = "U"  if inrange(sni02,99,99) | inrange(sni07,99,99)



gen agricu = SNI_1dig=="A"
gen mining = SNI_1dig=="B"
gen manufa = SNI_1dig=="C"
gen elecwat= SNI_1dig=="DE" 
gen constr = SNI_1dig=="F"
gen retail = SNI_1dig=="G"
gen transp = SNI_1dig=="HJ"
gen financ = SNI_1dig=="K"
gen nonfin = SNI_1dig=="LMN"
gen public = SNI_1dig=="O"
gen educa  = SNI_1dig=="P"
gen human  = SNI_1dig=="Q"
gen enter  = SNI_1dig=="RS"


gen other =  manufa==0  & constr==0 & retail==0 & transp==0 & financ==0 & nonfin==0 & public==0 & educa==0 & human==0 & enter==0
}
*




{ // Merging on covarites
*===============================================================================

{ // Merge on variabels from LISA 
*===============================================================================
*LISA 2000-2018
merge m:1 persid year using "$datapath/A0_LISAind_2000_2018.dta"
drop if _merge==2
drop 	_merge

recode utbniv (1=1 "Primary School") (2=1 "Primary School") (3=2 "High School") (4=2 "High School") (5=3 "College") (6=3 "College") (7=4 "P.hD"), gen(utbniv2)
qui tab utbniv2 , gen(educ)
drop utbniv2*

}
*

{ // Merge on date of birth
*===============================================================================
merge m:1 persid using "$datapath/A0_fodelsedatum.dta"

*Missing birth date for 513 people
lab var birth 	"Date of birth"

drop if _merge==2
drop  	_merge

gen age_def = year - year(dofm(birth))
keep if inrange(age_def,16,74)

}
*

{ // Merge on immigrant indicator
*===============================================================================
preserve
	use  "$rawdatapath/DS_Lev_Fodelseland.dta",clear

	destring fodgreg fodgrmor fodgrfar , replace

	*Generate: Immigrant and second generation immigrant dummy
	gen immigrant 		= fodgreg !=0
	gen immigrant_secgen  	= fodgrmor!=0 & fodgrfar!=0 & fodgreg==0	

	keep lopnr_personnr imm*
	duplicates drop _all, force 		
	
	rename lopnr_personnr persid
	
	tempfile immigrant
	save `immigrant'
restore	

*Merge onto data
merge m:1 persid using `immigrant'
drop if _merge==2
drop 	_merge
}
*

{ // Merge on tenure
*===============================================================================
*Merge on DS tenure meassure
	
merge m:1 persid firmid year 	using "$datapath/A0_tenure_for_all.dta"
drop if _merge==2
drop 	_merge
	
gen tenure = tenure_all/12


}
*

{ // Merge on wages
*===============================================================================
merge m:1 persid year	using "$datapath/A1_clean_wagesurvey_00_18_persidyear.dta", keepusing(manl_notfirm_specific)
bys persid (year) : gen manl_prenot_def=manl_notfirm_specific[_n-1]
drop if _merge==2 
drop 	_merge

}
*

{ // Inflation adjust earnings and wages to 2005 SEK
*===============================================================================
merge m:1 year 	using "$datapath/cpi_jc.dta"
drop if _merge==2
drop 	_merge

foreach var of varlist 	annual_ear_prenot_def  manl_prenot_def {
	replace `var' = `var'/cpi
}

}
*


{ // Merge on firm characteristics
*===============================================================================
*Note: 	Merging on firm charaterisics created from RAMS data in A1_clean_firmchar.do
*	Characterisics are only created for the years 2004-2013 so _merge==1
*	is OK for year 1990-2003 and 2014-2015.
*	_merge==2 observations comes from the master data being restricted to
*	indivuals who are  getting notified.

merge m:1 firmid year using "$datapath/A1_firm_char.dta"

drop if _merge==2
drop 	_merge
}
*

}
*

*Generate industry weights for this large sample
preserve
	gen freq_industry_large =1
	gcollapse (percent) freq_industry_large, by(SNI_1dig)
	replace freq_industry_large = freq_industry_large/100
	save "$datapath/year_industrysamplingweights_large.dta",replace 
restore	

{ // merge on industry weights
********************************************************************************
merge m:1 SNI_1dig using 	"$datapath/year_industrysamplingweights.dta"
keep if _merge==3
tab SNI if _merge==1,m
drop _merge
merge m:1 SNI_1dig using 	"$datapath/year_industrysamplingweights_large.dta"
keep if _merge==3
drop _merge
}
*


replace annual_ear_prenot_def = annual_ear_prenot_def/1000
replace manl_prenot_def = manl_prenot_def/1000
// Unweighted
eststo: estpost sum female immigrant age_def tenure annual_ear_prenot_def manl_prenot_def educ1 educ2 educ3   	  	/// 
			  manufa  constr retail transp financ nonfin public educa human enter other firm_noemployed_RAMS
esttab , cells("mean(fmt(2))")
*THIS ONE HAS TO BE TYPED IN MANUALLY IN TABLE
bys firmid year : gen temp = _n
sum firm_noemployed_RAMS if  temp==1





eststo: estpost sum female immigrant age_def tenure annual_ear_prenot_def manl_prenot_def educ1 educ2 educ3   	  	/// 
			  manufa  constr retail transp financ nonfin public educa human enter other firm_noemployed_RAMS [aweight=freq_industry/freq_industry_large]
esttab , cells("mean(fmt(2))")
	
*THIS ONE HAS TO BE TYPED IN MANUALLY IN TABLE	
sum firm_noemployed_RAMS [aweight=freq_industry/freq_industry_large] if  temp==1	
}
*

esttab using "$agetablepath/Summarystat/tab_workersummary.tex", replace booktabs nodepvars  nomtitles nolines	nonumbers nonotes /// 
cells("mean(fmt(2))")  substitute(\_ _) /// 
refcat(educ1 "\textit{Educational attainment}" manufa "\textit{Industry shares}" , nolabel)  ///
varlabels(  female "Female" immigrant "Immigrant" age_def "Age" tenure "Tenure (years)" annual_ear_prenot_def "Earnings$ _{t-1}$ (1,000 SEK) " manl_prenot_def "Wage$ _{t-1}$ (1,000 SEK) " educ1 "Compulsory" educ2 "Upper-secondary" educ3 "College"   firm_noemployed_RAMS "\addlinespace Firm size (\# employed)"  ///
		 manufa "Manufacturing" constr "Construction" retail "Wholesale and retail" ///
		transp "Transport" financ "Financial Services" nonfin "Non-Financial services" public "Public administration" educa "Education" ///
		human "Health care" enter "Entertainment" elecwat "Electricity" other "Other"  ) ///
	stats(  N, fmt( %9.0fc) labels(  "\midrule \# observations")) /// /// 
		prehead("\begin{table}[htbp]\centering " ///
"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi} " ///
"\caption{\textsc{Descriptive Statistics} } " ///
"\begin{tabular}{l*{4}{c}} " ///
"\toprule" ///
"&\multicolumn{1}{c}{All Notified}&\multicolumn{1}{c}{Notified }&\multicolumn{1}{c}{Employed worker}&\multicolumn{1}{c}{Employed worker }\\ " ///
"&\multicolumn{1}{c}{}&\multicolumn{1}{c}{(Age 55-sample)}&\multicolumn{1}{c}{}&\multicolumn{1}{c}{  in same industry}\\ " ///
"&(1)&(2)&(3)&(4)\\ " ///
"\midrule" ///
) ///
postfoot("\bottomrule \end{tabular} \end{table}")



